Sorting lists


In [5]:
x= ["duck","aardvark","crocodile", "emu", "bee"]

In [6]:
x.sort()

In [7]:
x


Out[7]:
['aardvark', 'bee', 'crocodile', 'duck', 'emu']

In [8]:
### sorted by descending order
sorted(x,reverse=True)


Out[8]:
['emu', 'duck', 'crocodile', 'bee', 'aardvark']

In [9]:
### sorted by second letter:
#sorted(x, key=??)

In [10]:
def get_second_letter(s):
    return s[1]

In [11]:
get_second_letter("cheese")


Out[11]:
'h'

In [12]:
sorted(x,key=get_second_letter) #key is a parameter, value is a function:get_second_letter


Out[12]:
['aardvark', 'bee', 'emu', 'crocodile', 'duck']

In [13]:
type(get_second_letter)


Out[13]:
function

lambda functions: a way of writing a function on a single line


In [14]:
# normal function: def nameofthefunction parameter: return expression
def get_second_letters(s):
    return s[1]

In [15]:
# the previous is the same as:
get_second_letter = lambda s:s[1]

In [16]:
type(lambda s:s[1])


Out[16]:
function

In [17]:
print("hello")


hello

In [18]:
sorted(x,key=lambda s:s[1])


Out[18]:
['aardvark', 'bee', 'emu', 'crocodile', 'duck']

tuple("rhymes with supple")

tuple is a kind of like a strict kind of list


In [19]:
t= (5,10,15)

In [20]:
type(t)


Out[20]:
tuple

In [21]:
t[0]


Out[21]:
5

In [22]:
for item in t:
    print(item*item)


25
100
225

In [23]:
#tuple is like a list but it can't be changed, it's called "immutable" data type
#one benefit is exactly that: it can't be changed.
#other benefit is that tuples are memory-efficient.

In [24]:
hello=[1,2,3]

In [25]:
foo=(1,2,3)

In [26]:
import sys
sys.getsizeof(hello)


Out[26]:
88

In [27]:
sys.getsizeof(foo)


Out[27]:
72

back to regular expressions for a moment

grouping with multiple matches in the same string


In [28]:
import re
test="one 1 two 2 three 3 four 4 five 5"
re.findall(r"\w+ \d",test)


Out[28]:
['one 1', 'two 2', 'three 3', 'four 4', 'five 5']

In [29]:
for item in re.findall(r"\w+ \d",test):
    x=item.split("")
    print(x[0])
    print(x[1])


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-29-60227970f8a8> in <module>()
      1 for item in re.findall(r"\w+ \d",test):
----> 2     x=item.split("")
      3     print(x[0])
      4     print(x[1])

ValueError: empty separator

In [ ]:
test="one 1 two 2 three 3 four 4 five 5"
re.findall(r"(\w+) (\d)",test)

In [ ]:
all_subjects = open("enronsubjects.txt").read()

In [ ]:
for item in re.findall(r"(\d{3})-(\d{3})-(\d{4})", all_subjects):
    print item[0]

In [ ]:
[item[0] for item in re.findall(r"(\d{3})-(\d{3})-(\d{4})", all_subjects)]

monetary amounts in the subject lines

match something like $10 m, k, b


In [ ]:
re.findall(r"\$(\d+) ?(\w+)", all_subjects)

In [ ]:
vals=[]
for item in re.findall(r"\$(\d+) ?([mMbBkK])", all_subjects):
    multiplier=item[1].lower()
    number_val=int(item[0])
    if multiplier=='k':
        number_val *= 1000
    elif multiplier=='m':
        number_val *= 1000000
    elif multiplier=='b':
        number_val *= 1000000000
    vals.append(number_val)
sum(vals)

substitution with regular expressions


In [ ]:
message = "this is a test, this is only a test"

In [ ]:
message.replace("this","that").replace("test","walrus") #.replace() replace part of a string

In [ ]:
re.findall(r"\d{3}-\d{3}-\d{4}", all_subjects)

In [ ]:
message="This is a test; this is only a test."
re.sub(r"[Tt]his", "that", message) #.sub():substitute a pattern

In [ ]:
re.sub(r"\b\w+\b", "WALRUS", message)

In [ ]:
anon = re.sub(r"\d{3}-\d{3}-\d{4}", "555-555-5555", all_subjects)

In [ ]:
re.findall(r".{,20}\d{3}-\d{3}-\d{4}.{,20}",anon)

In [ ]:
anon = re.sub(r"(\d{3}-\d{3}-\d{4}", r"\1-\2-XXXX", all_subjects)

HTML to SQL


In [32]:
from urllib.request import urlretrieve
urlretrieve("https://raw.githubusercontent.com/ledeprogram/data-and-databases/master/menupages-morningside-heights.html")


Out[32]:
('/var/folders/zh/j6d1c6bx17jcmkyybqlcnjlm0000gn/T/tmpb_7h3mb2',
 <http.client.HTTPMessage at 0x1132d2400>)

In [ ]:
#urlretrieve(url, filename)

store:

  • restaurant name
  • price ($
  • cuisine

research phase:

  • every restaurant has a that is a child of the tag with class search-results
  • restaurants are in tags with class name-address
  • restaurant names are in an tag inside that tag
  • restaurant price in a span inside a tag with class price
  • the cuisine of the restaurant is in a tag with no class, the fifth tag that is a child of the restaurant's '

targets:

  • list of dictionaries [
    {'name':"Brad's, 'price': 1,'cuisine':['coffee']}, 
    {'name':"Cafe Nana", 'price':0, 'cuisines':['Middle Eastern','Kosher']},
    ...
    
    ]

In [33]:
from bs4 import BeautifulSoup

In [34]:
raw_html = open("menupages-morningside-heights.html").read()
soup = BeautifulSoup(raw_html, "html.parser")

In [35]:
#just the name
search_table=soup.find("table", {'class':'search-results'})
table_body=search_table.find('tbody')
for tr_tag in table_body.find_all('tr'):
    name_address_tag = tr_tag.find('td', {'class':'name-address'})
    a_tag = name_address_tag.find('a')
    print(a_tag.string)


Milano Market
Massawa
China Place
Subsconscious
Famous Famiglia
Kitchenette
V & T Pizza
New Aroma
Peking Garden
Tom's Restaurant
Pisticci
Deluxe
Toast
Tom's Delicious Pizza
West Place
Che' Bella Pizza
Ajanta
Panino Sportivo Roma
Max Soha
Strokos Pizza
Camille's
Amsterdam Restaurant
Nussbaum & Wu
Amir's Grill
M2M - Morning To Midnight
The Mill
Le Monde
Melba's
Chuck E Cheese's
Haagen-Dazs
Oren's
Dinosaur Bar-B-Que
Symposium Greek Restaurant
Koronet Pizza
The Heights Bar & Grill
Cafe Nana
Hamilton Deli
Community Food & Juice
Haakon's Hall
El Porton
Brad's
Mel's Burger Bar
Bettolona
Five Guys
Nikko
Falafel on Broadway
Sushi Sushi
Insomnia Cookies
Cafe Amrita
Pinkberry
Artopolis Espresso Cafe
Max Caffe
Chipotle
Chokolat Patisserie
Chokolat Patisserie
Joe's G-H Deli
Joe the Art of Coffee
Levain Bakery
Silvana
Bier International
Vegenation
Flat Top
Kuro Kuma
Vinateria
Henan Cart
Vine
El Paso Truck
Mama's Fried Chicken and Pizza
Chapati House
Dig Inn Seasonal Market
Uncle Luoyang
Pita Grill
Kissaten Jin
Serengeti Teas & Spices
Lighthouse Fish Market & Restaurant
Lolo's Seafood Shack
Harmony
Bernheim & Schwartz
BTH (By The Hudson)
Empanadas Monumental
Friedman's
176 Presbyterian Deli
Nacho Mama's
Streetbird Rotisserie
Colum Chinese Restaurant
Flacoz Tacoz
sweetgreen
La Salle Dumpling Room

In [36]:
search_table=soup.find("table", {'class':'search-results'})
table_body=search_table.find('tbody')
for tr_tag in table_body.find_all('tr'):
    # get the restaurant name from the a inside a td
    name_address_tag = tr_tag.find('td', {'class':'name-address'})
    a_tag = name_address_tag.find('a')
    restaurant_name=a_tag.string
    #get the price from the span if present
    price_tag = tr_tag.find('td', {'class':'price'})
    price_span_tag = price_tag.find('span')
    if price_span_tag:
        price=int(price_span_tag.string)
    else:
        price=0
    print(restaurant_name, price)


Milano Market 2
Massawa 0
China Place 0
Subsconscious 0
Famous Famiglia 0
Kitchenette 0
V & T Pizza 0
New Aroma 1
Peking Garden 1
Tom's Restaurant 1
Pisticci 1
Deluxe 1
Toast 2
Tom's Delicious Pizza 2
West Place 1
Che' Bella Pizza 0
Ajanta 0
Panino Sportivo Roma 2
Max Soha 2
Strokos Pizza 3
Camille's 1
Amsterdam Restaurant 1
Nussbaum & Wu 0
Amir's Grill 1
M2M - Morning To Midnight 1
The Mill 2
Le Monde 1
Melba's 1
Chuck E Cheese's 0
Haagen-Dazs 1
Oren's 0
Dinosaur Bar-B-Que 3
Symposium Greek Restaurant 0
Koronet Pizza 1
The Heights Bar & Grill 1
Cafe Nana 0
Hamilton Deli 0
Community Food & Juice 0
Haakon's Hall 0
El Porton 0
Brad's 1
Mel's Burger Bar 2
Bettolona 1
Five Guys 1
Nikko 2
Falafel on Broadway 1
Sushi Sushi 1
Insomnia Cookies 1
Cafe Amrita 1
Pinkberry 2
Artopolis Espresso Cafe 1
Max Caffe 2
Chipotle 1
Chokolat Patisserie 5
Chokolat Patisserie 1
Joe's G-H Deli 2
Joe the Art of Coffee 1
Levain Bakery 2
Silvana 1
Bier International 1
Vegenation 1
Flat Top 4
Kuro Kuma 1
Vinateria 2
Henan Cart 1
Vine 2
El Paso Truck 1
Mama's Fried Chicken and Pizza 1
Chapati House 2
Dig Inn Seasonal Market 1
Uncle Luoyang 1
Pita Grill 2
Kissaten Jin 1
Serengeti Teas & Spices 2
Lighthouse Fish Market & Restaurant 3
Lolo's Seafood Shack 2
Harmony 1
Bernheim & Schwartz 2
BTH (By The Hudson) 3
Empanadas Monumental 0
Friedman's 0
176 Presbyterian Deli 0
Nacho Mama's 0
Streetbird Rotisserie 0
Colum Chinese Restaurant 0
Flacoz Tacoz 0
sweetgreen 0
La Salle Dumpling Room 0

In [37]:
def get_name(tag):
    return"TEST RESTAURANT"
def get_price(tag):
    return"999999"

In [38]:
search_table=soup.find("table", {'class':'search-results'})
table_body=search_table.find('tbody')
for tr_tag in table_body.find_all('tr'):
    restaurant_name = get_name(tr_tag)
    price = get_price(tr_tag)
    print(restaurant_name, price)


TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999
TEST RESTAURANT 999999

In [39]:
def get_name(tr_tag):
    name_address_tag = tr_tag.find('td', {'class':'name-address'})
    a_tag = name_address_tag.find('a')
    restaurant_name=a_tag.string
    return restaurant_name
def get_price(tr_tag):
    price_tag = tr_tag.find('td', {'class':'price'})
    price_span_tag = price_tag.find('span')
    if price_span_tag:
        price=int(price_span_tag.string)
    else:
        price=0
    return price
def get_cuisines(tr_tag):
    all_td_tags=tr_tag.find_all('td')
    cuisine_tag = all_td_tags[4]
    print(cuisine_tag)
    cuisines = cuisine_tag.string
    if cuisines:
        cuisines_list= cuisines.split(", ")
    else:
        cuisine_list=[]
    return cuisines_list

In [40]:
restaurants=[]
search_table=soup.find("table", {'class':'search-results'})
table_body=search_table.find('tbody')
for tr_tag in table_body.find_all('tr'):
    restaurant_name = get_name(tr_tag)
    price = get_price(tr_tag)
    cuisines=get_cuisines(tr_tag)
    rest_dict = {'name':restaurant_name, 'price':price, 'cuisines':cuisines}
    restaurants.append(rest_dict)
restaurants


<td>deli, sandwiches</td>
<td>ethiopian, african</td>
<td>chinese, japanese, sushi</td>
<td>cheese-steaks, deli, sandwiches, salads</td>
<td>italian, pizza</td>
<td>bakery-pastries, american, desserts</td>
<td>italian, pizza</td>
<td>chinese</td>
<td>chinese</td>
<td>diner</td>
<td>italian</td>
<td>diner, american</td>
<td>american, bar-food</td>
<td>italian, pizza</td>
<td>chinese</td>
<td>italian, pizza</td>
<td>indian</td>
<td>italian, coffee-tea, sandwiches</td>
<td>italian</td>
<td>deli, pizza, sandwiches, chicken</td>
<td>american</td>
<td>american-new, tapas</td>
<td>deli, sandwiches, bagels, salads</td>
<td>middle-eastern</td>
<td>japanese, sushi, deli, sandwiches</td>
<td>korean</td>
<td>french, bistro</td>
<td>american-new, soul-food</td>
<td>pizza, american</td>
<td>desserts</td>
<td>coffee-tea, sandwiches</td>
<td>barbecue, soul-food</td>
<td>greek, vegetarian</td>
<td>pizza</td>
<td>southwestern, tapas, bar-food</td>
<td>middle-eastern, kosher</td>
<td>deli, sandwiches</td>
<td>american-new, smoothies-juices, vegetarian, healthy, localorganic</td>
<td>american, scandinavian</td>
<td>mexican</td>
<td>coffee-tea</td>
<td>burgers, bar-food</td>
<td>italian, pizza, localorganic</td>
<td>burgers, hot-dogs</td>
<td>chinese, japanese, sushi, teahouses</td>
<td>middle-eastern</td>
<td>japanese, sushi</td>
<td>bakery-pastries, desserts</td>
<td>coffee-tea, sandwiches</td>
<td>desserts</td>
<td>coffee-tea, sandwiches, crepes</td>
<td>coffee-tea, sandwiches</td>
<td>mexican</td>
<td>bakery-pastries, desserts</td>
<td>bakery-pastries, coffee-tea, desserts</td>
<td>deli, sandwiches</td>
<td>coffee-tea</td>
<td>bakery-pastries, desserts</td>
<td>middle-eastern, vegetarian</td>
<td>german, eclectic, bar-food</td>
<td>indian, vegetarian</td>
<td>american-new, bistro</td>
<td>coffee-tea</td>
<td>american-new</td>
<td>chinese</td>
<td>japanese, sushi, thai</td>
<td>mexican</td>
<td>pizza, chicken</td>
<td>indian</td>
<td>american, localorganic</td>
<td>chinese</td>
<td>middle-eastern</td>
<td>japanese</td>
<td>coffee-tea, teahouses</td>
<td>seafood, wings, fish--chips</td>
<td>seafood</td>
<td>sandwiches, salads, teahouses</td>
<td>american, gastropub</td>
<td>american-new</td>
<td></td>
---------------------------------------------------------------------------
UnboundLocalError                         Traceback (most recent call last)
<ipython-input-40-df443e7e8843> in <module>()
      5     restaurant_name = get_name(tr_tag)
      6     price = get_price(tr_tag)
----> 7     cuisines=get_cuisines(tr_tag)
      8     rest_dict = {'name':restaurant_name, 'price':price, 'cuisines':cuisines}
      9     restaurants.append(rest_dict)

<ipython-input-39-3591eccc826a> in get_cuisines(tr_tag)
     21     else:
     22         cuisine_list=[]
---> 23     return cuisines_list

UnboundLocalError: local variable 'cuisines_list' referenced before assignment

In [41]:
import pandas as pd
df = pd.DataFrame(restaurants)

In [42]:
df[df['price'] >2]


Out[42]:
cuisines name price
19 [deli, pizza, sandwiches, chicken] Strokos Pizza 3
31 [barbecue, soul-food] Dinosaur Bar-B-Que 3
53 [bakery-pastries, desserts] Chokolat Patisserie 5
61 [american-new, bistro] Flat Top 4
74 [seafood, wings, fish--chips] Lighthouse Fish Market & Restaurant 3
78 [american-new] BTH (By The Hudson) 3

Putting this stuff into SQL

  • "schema" -> desiging the tables
  • what table(s) do we need?
  • what should those tables have in them? (columns and data types)

  • data normalization normal form

  • entities

  • restaurant name, price, list of cuisines

  • which cuisines a restaurant is associated with

restaurant table: 'id' (unique integer identifying the restaurant)'name'(string with the restaurant's name) *'price'(integer that corresponds to the number of dollar signs)

cuisine table:

  • 'restaurant_id' (number associated with restaurant)
  • 'kind' (string that identifies the cuisine type itself) ...

sample entry from restaurant table restaurant_id: 4 name: Brad's price:1 ... ... sameple entry from cuisine table restaurant_id: 4 kind: coffee-tea

restaurant_id: 4 kind: seafood ... select restaurant.restaurant_id from restaurant join cuisine on restaurant.restaurant_id=cuisine.restaurant_id

"set up ohase"-> creating database and creating tables "one time"->psql "working with data phase"-> inserting records, selecting stuff->python

sql_data_types like

  • int_Integer *varvhar(n) string with length n
  • numeric number/decimal
  • "serial"-> ubteger that is automatically assigned

connect to the database


In [43]:
import pg8000
conn= pg8000.connect(database="menupages")

In [44]:
type(conn)


Out[44]:
pg8000.core.Connection

In [63]:
conn.rollback() #execyte this whever you make a SQL problem

In [46]:
cursor = conn.cursor()

cursor object:

.execute()<- execute a SQL statement.fetchone()<- fetches the first record of the results of a statement(as a list) *.fetchall()<- returns ALL the rows of the results of a statement(as a list)


In [47]:
cursor.execute("INSERT INTO restaurant(name,price) VALUES('Good Food Place',3)")
conn.commit()

In [48]:
cursor.execute("SELECT * FROM restaurant")
for item in cursor.fetchall():
    print(item)


[4, 'Panino Sportivo Roma', 2]
[5, 'Good Food Place', 3]

In [52]:
cursor.execute(
   "INSERT INTO restaurant(name,price) VALUES('Palace of Vegan Nosh',3) RETURNING id")
results= cursor.fetchone() #return a list
conn.commit()

In [53]:
rowid=results[0]

In [54]:
rowid


Out[54]:
6

quoting and parameters in SQL


In [55]:
#will not work:
cursor.execute(
 "INSERT INTO restaurant(name,price) VALUES ('Brad's', 1) RETURNING id")
rowid=cursor.fetchone()[0]
conn.commit()


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/usr/local/lib/python3.5/site-packages/pg8000/core.py in execute(self, cursor, operation, vals)
   1896         try:
-> 1897             ps = cache['ps'][key]
   1898             cursor.ps = ps

KeyError: ("INSERT INTO restaurant(name,price) VALUES ('Brad's', 1) RETURNING id", ())

During handling of the above exception, another exception occurred:

ProgrammingError                          Traceback (most recent call last)
<ipython-input-55-432bd528a0a3> in <module>()
      1 cursor.execute(
----> 2  "INSERT INTO restaurant(name,price) VALUES ('Brad's', 1) RETURNING id")
      3 rowid=cursor.fetchone()[0]
      4 conn.commit()

/usr/local/lib/python3.5/site-packages/pg8000/core.py in execute(self, operation, args, stream)
    905                 if not self._c.in_transaction and not self._c.autocommit:
    906                     self._c.execute(self, "begin transaction", None)
--> 907                 self._c.execute(self, operation, args)
    908         except AttributeError as e:
    909             if self._c is None:

/usr/local/lib/python3.5/site-packages/pg8000/core.py in execute(self, cursor, operation, vals)
   1944                 raise OperationalError(str(e))
   1945 
-> 1946             self.handle_messages(cursor)
   1947 
   1948             # We've got row_desc that allows us to identify what we're

/usr/local/lib/python3.5/site-packages/pg8000/core.py in handle_messages(self, cursor)
   2092 
   2093         if self.error is not None:
-> 2094             raise self.error
   2095 
   2096     # Byte1('C') - Identifies the message as a close command.

ProgrammingError: ('ERROR', '42601', 'syntax error at or near "s"', '50', 'scan.l', '1081', 'scanner_yyerror', '', '')

In [56]:
#SQL Injection attack:
restaurant = "'Restaurant'); DELETE FROM restaurant;"

string in python -> "quote" "escape"-> valid sql statement very weird and difficult and arcane


In [64]:
rest_insert = "INSERT INTO restaurant(name, price) VALUES (%s, %s)" #%S placeholder
cursor.execute(rest_insert, ["Brad's", 1])
# pg8000 does the work: "INSERT INTO restaurant (name,price) VALUES ('Brad\'s',1)"
conn.commit()

Insert a restaurant and its cuisines


In [65]:
cursor.execute("INSERT INTO restaurant(name, price) VALUES(%s, %s) RETURNING id",["Test Restaurant",2])
rowid=cursor.fetchone()[0]
conn.commit()

In [66]:
rowid


Out[66]:
8

In [67]:
# let's say Test Restaurant serves fondue and casseroles
cuisine_insert = "INSERT INTO cuisine(restaurant_id, kind) VALUES (%s, %s)"
cursor.execute(cuisine_insert, [rowid, "fondue"])
cursor.execute(cuisine_insert, [rowid,"casseroles"])
conn.commit()

In [ ]:
### Insert Many restaurants

In [68]:
restaurants


Out[68]:
[{'cuisines': ['deli', 'sandwiches'], 'name': 'Milano Market', 'price': 2},
 {'cuisines': ['ethiopian', 'african'], 'name': 'Massawa', 'price': 0},
 {'cuisines': ['chinese', 'japanese', 'sushi'],
  'name': 'China Place',
  'price': 0},
 {'cuisines': ['cheese-steaks', 'deli', 'sandwiches', 'salads'],
  'name': 'Subsconscious',
  'price': 0},
 {'cuisines': ['italian', 'pizza'], 'name': 'Famous Famiglia', 'price': 0},
 {'cuisines': ['bakery-pastries', 'american', 'desserts'],
  'name': 'Kitchenette',
  'price': 0},
 {'cuisines': ['italian', 'pizza'], 'name': 'V & T Pizza', 'price': 0},
 {'cuisines': ['chinese'], 'name': 'New Aroma', 'price': 1},
 {'cuisines': ['chinese'], 'name': 'Peking Garden', 'price': 1},
 {'cuisines': ['diner'], 'name': "Tom's Restaurant", 'price': 1},
 {'cuisines': ['italian'], 'name': 'Pisticci', 'price': 1},
 {'cuisines': ['diner', 'american'], 'name': 'Deluxe', 'price': 1},
 {'cuisines': ['american', 'bar-food'], 'name': 'Toast', 'price': 2},
 {'cuisines': ['italian', 'pizza'],
  'name': "Tom's Delicious Pizza",
  'price': 2},
 {'cuisines': ['chinese'], 'name': 'West Place', 'price': 1},
 {'cuisines': ['italian', 'pizza'], 'name': "Che' Bella Pizza", 'price': 0},
 {'cuisines': ['indian'], 'name': 'Ajanta', 'price': 0},
 {'cuisines': ['italian', 'coffee-tea', 'sandwiches'],
  'name': 'Panino Sportivo Roma',
  'price': 2},
 {'cuisines': ['italian'], 'name': 'Max Soha', 'price': 2},
 {'cuisines': ['deli', 'pizza', 'sandwiches', 'chicken'],
  'name': 'Strokos Pizza',
  'price': 3},
 {'cuisines': ['american'], 'name': "Camille's", 'price': 1},
 {'cuisines': ['american-new', 'tapas'],
  'name': 'Amsterdam Restaurant',
  'price': 1},
 {'cuisines': ['deli', 'sandwiches', 'bagels', 'salads'],
  'name': 'Nussbaum & Wu',
  'price': 0},
 {'cuisines': ['middle-eastern'], 'name': "Amir's Grill", 'price': 1},
 {'cuisines': ['japanese', 'sushi', 'deli', 'sandwiches'],
  'name': 'M2M - Morning To Midnight',
  'price': 1},
 {'cuisines': ['korean'], 'name': 'The Mill', 'price': 2},
 {'cuisines': ['french', 'bistro'], 'name': 'Le Monde', 'price': 1},
 {'cuisines': ['american-new', 'soul-food'], 'name': "Melba's", 'price': 1},
 {'cuisines': ['pizza', 'american'], 'name': "Chuck E Cheese's", 'price': 0},
 {'cuisines': ['desserts'], 'name': 'Haagen-Dazs', 'price': 1},
 {'cuisines': ['coffee-tea', 'sandwiches'], 'name': "Oren's", 'price': 0},
 {'cuisines': ['barbecue', 'soul-food'],
  'name': 'Dinosaur Bar-B-Que',
  'price': 3},
 {'cuisines': ['greek', 'vegetarian'],
  'name': 'Symposium Greek Restaurant',
  'price': 0},
 {'cuisines': ['pizza'], 'name': 'Koronet Pizza', 'price': 1},
 {'cuisines': ['southwestern', 'tapas', 'bar-food'],
  'name': 'The Heights Bar & Grill',
  'price': 1},
 {'cuisines': ['middle-eastern', 'kosher'], 'name': 'Cafe Nana', 'price': 0},
 {'cuisines': ['deli', 'sandwiches'], 'name': 'Hamilton Deli', 'price': 0},
 {'cuisines': ['american-new',
   'smoothies-juices',
   'vegetarian',
   'healthy',
   'localorganic'],
  'name': 'Community Food & Juice',
  'price': 0},
 {'cuisines': ['american', 'scandinavian'],
  'name': "Haakon's Hall",
  'price': 0},
 {'cuisines': ['mexican'], 'name': 'El Porton', 'price': 0},
 {'cuisines': ['coffee-tea'], 'name': "Brad's", 'price': 1},
 {'cuisines': ['burgers', 'bar-food'], 'name': "Mel's Burger Bar", 'price': 2},
 {'cuisines': ['italian', 'pizza', 'localorganic'],
  'name': 'Bettolona',
  'price': 1},
 {'cuisines': ['burgers', 'hot-dogs'], 'name': 'Five Guys', 'price': 1},
 {'cuisines': ['chinese', 'japanese', 'sushi', 'teahouses'],
  'name': 'Nikko',
  'price': 2},
 {'cuisines': ['middle-eastern'], 'name': 'Falafel on Broadway', 'price': 1},
 {'cuisines': ['japanese', 'sushi'], 'name': 'Sushi Sushi', 'price': 1},
 {'cuisines': ['bakery-pastries', 'desserts'],
  'name': 'Insomnia Cookies',
  'price': 1},
 {'cuisines': ['coffee-tea', 'sandwiches'], 'name': 'Cafe Amrita', 'price': 1},
 {'cuisines': ['desserts'], 'name': 'Pinkberry', 'price': 2},
 {'cuisines': ['coffee-tea', 'sandwiches', 'crepes'],
  'name': 'Artopolis Espresso Cafe',
  'price': 1},
 {'cuisines': ['coffee-tea', 'sandwiches'], 'name': 'Max Caffe', 'price': 2},
 {'cuisines': ['mexican'], 'name': 'Chipotle', 'price': 1},
 {'cuisines': ['bakery-pastries', 'desserts'],
  'name': 'Chokolat Patisserie',
  'price': 5},
 {'cuisines': ['bakery-pastries', 'coffee-tea', 'desserts'],
  'name': 'Chokolat Patisserie',
  'price': 1},
 {'cuisines': ['deli', 'sandwiches'], 'name': "Joe's G-H Deli", 'price': 2},
 {'cuisines': ['coffee-tea'], 'name': 'Joe the Art of Coffee', 'price': 1},
 {'cuisines': ['bakery-pastries', 'desserts'],
  'name': 'Levain Bakery',
  'price': 2},
 {'cuisines': ['middle-eastern', 'vegetarian'], 'name': 'Silvana', 'price': 1},
 {'cuisines': ['german', 'eclectic', 'bar-food'],
  'name': 'Bier International',
  'price': 1},
 {'cuisines': ['indian', 'vegetarian'], 'name': 'Vegenation', 'price': 1},
 {'cuisines': ['american-new', 'bistro'], 'name': 'Flat Top', 'price': 4},
 {'cuisines': ['coffee-tea'], 'name': 'Kuro Kuma', 'price': 1},
 {'cuisines': ['american-new'], 'name': 'Vinateria', 'price': 2},
 {'cuisines': ['chinese'], 'name': 'Henan Cart', 'price': 1},
 {'cuisines': ['japanese', 'sushi', 'thai'], 'name': 'Vine', 'price': 2},
 {'cuisines': ['mexican'], 'name': 'El Paso Truck', 'price': 1},
 {'cuisines': ['pizza', 'chicken'],
  'name': "Mama's Fried Chicken and Pizza",
  'price': 1},
 {'cuisines': ['indian'], 'name': 'Chapati House', 'price': 2},
 {'cuisines': ['american', 'localorganic'],
  'name': 'Dig Inn Seasonal Market',
  'price': 1},
 {'cuisines': ['chinese'], 'name': 'Uncle Luoyang', 'price': 1},
 {'cuisines': ['middle-eastern'], 'name': 'Pita Grill', 'price': 2},
 {'cuisines': ['japanese'], 'name': 'Kissaten Jin', 'price': 1},
 {'cuisines': ['coffee-tea', 'teahouses'],
  'name': 'Serengeti Teas & Spices',
  'price': 2},
 {'cuisines': ['seafood', 'wings', 'fish--chips'],
  'name': 'Lighthouse Fish Market & Restaurant',
  'price': 3},
 {'cuisines': ['seafood'], 'name': "Lolo's Seafood Shack", 'price': 2},
 {'cuisines': ['sandwiches', 'salads', 'teahouses'],
  'name': 'Harmony',
  'price': 1},
 {'cuisines': ['american', 'gastropub'],
  'name': 'Bernheim & Schwartz',
  'price': 2},
 {'cuisines': ['american-new'], 'name': 'BTH (By The Hudson)', 'price': 3}]

In [73]:
#error
rest_insert = "INSERT INTO restaurant(name,price)VALUES(%s,%s)"
for item in restaurants:
    cursor.execute(rest_insert, [item['name'], item['price']])
conn.commit()


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/usr/local/lib/python3.5/site-packages/pg8000/core.py in make_params(self, values)
   1854             try:
-> 1855                 params.append(self.py_types[typ])
   1856             except KeyError:

KeyError: <class 'bs4.element.NavigableString'>

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
/usr/local/lib/python3.5/site-packages/pg8000/core.py in make_params(self, values)
   1857                 try:
-> 1858                     params.append(self.inspect_funcs[typ](value))
   1859                 except KeyError as e:

KeyError: <class 'bs4.element.NavigableString'>

During handling of the above exception, another exception occurred:

NotSupportedError                         Traceback (most recent call last)
<ipython-input-73-8ae20aaa97d8> in <module>()
      2 rest_insert = "INSERT INTO restaurant(name,price)VALUES(%s,%s)"
      3 for item in restaurants:
----> 4     cursor.execute(rest_insert, [item['name'], item['price']])
      5 conn.commit()

/usr/local/lib/python3.5/site-packages/pg8000/core.py in execute(self, operation, args, stream)
    905                 if not self._c.in_transaction and not self._c.autocommit:
    906                     self._c.execute(self, "begin transaction", None)
--> 907                 self._c.execute(self, operation, args)
    908         except AttributeError as e:
    909             if self._c is None:

/usr/local/lib/python3.5/site-packages/pg8000/core.py in execute(self, cursor, operation, vals)
   1891 
   1892         args = make_args(vals)
-> 1893         params = self.make_params(args)
   1894         key = operation, params
   1895 

/usr/local/lib/python3.5/site-packages/pg8000/core.py in make_params(self, values)
   1859                 except KeyError as e:
   1860                     raise NotSupportedError(
-> 1861                         "type " + str(e) + "not mapped to pg type")
   1862         return tuple(params)
   1863 

NotSupportedError: type <class 'bs4.element.NavigableString'>not mapped to pg type

In [70]:
first=restaurants[0]
first


Out[70]:
{'cuisines': ['deli', 'sandwiches'], 'name': 'Milano Market', 'price': 2}

In [71]:
type(first['name'])


Out[71]:
bs4.element.NavigableString

so what happened? Why isn't this just a string?

whenver you use the .string attribute of a Beatufil Soup tag object, the type of that value is bs4.element.NavigableString

fortunately, there's an esay fix:'str(val)' to convert that value into a string use str(value)


In [72]:
rest_insert = "INSERT INTO restaurant(name,price)VALUES(%s,%s)"
for item in restaurants:
    cursor.execute(rest_insert, [str(item['name']), item['price']])
conn.commit()

In [ ]:
# restaurants
for item in restaurants
sql to insert a restaurant
commit()

step2: for rest in rest: 
for cuisine in cuisine
sql to insert

inserting both restaurants and their cuisines


In [76]:
rest_insert="INSERT INTO restaurant(name,price)VALUES(%s,%s) RETURNING id"
cuisine_insert="INSERT INTO CUISINE(restaurant_id, kind)VALUES(%s,%s)"
for item in restaurants:
    #insert restaurant, RETURNING id
    print("inserting restaurant", item['name'])
    cursor.execute(rest_insert, [str(item['name']), item['price']])
    rowid=cursor.fetchone()[0]
    for cuisine in item['cuisines']:
        print(" -inserting cuisine",cuisine)
        cursor.execute(cuisine_insert, [rowid, str(cuisine)])
        #insert restaurant
conn.commit()


inserting restaurant Milano Market
 -inserting cuisine deli
 -inserting cuisine sandwiches
inserting restaurant Massawa
 -inserting cuisine ethiopian
 -inserting cuisine african
inserting restaurant China Place
 -inserting cuisine chinese
 -inserting cuisine japanese
 -inserting cuisine sushi
inserting restaurant Subsconscious
 -inserting cuisine cheese-steaks
 -inserting cuisine deli
 -inserting cuisine sandwiches
 -inserting cuisine salads
inserting restaurant Famous Famiglia
 -inserting cuisine italian
 -inserting cuisine pizza
inserting restaurant Kitchenette
 -inserting cuisine bakery-pastries
 -inserting cuisine american
 -inserting cuisine desserts
inserting restaurant V & T Pizza
 -inserting cuisine italian
 -inserting cuisine pizza
inserting restaurant New Aroma
 -inserting cuisine chinese
inserting restaurant Peking Garden
 -inserting cuisine chinese
inserting restaurant Tom's Restaurant
 -inserting cuisine diner
inserting restaurant Pisticci
 -inserting cuisine italian
inserting restaurant Deluxe
 -inserting cuisine diner
 -inserting cuisine american
inserting restaurant Toast
 -inserting cuisine american
 -inserting cuisine bar-food
inserting restaurant Tom's Delicious Pizza
 -inserting cuisine italian
 -inserting cuisine pizza
inserting restaurant West Place
 -inserting cuisine chinese
inserting restaurant Che' Bella Pizza
 -inserting cuisine italian
 -inserting cuisine pizza
inserting restaurant Ajanta
 -inserting cuisine indian
inserting restaurant Panino Sportivo Roma
 -inserting cuisine italian
 -inserting cuisine coffee-tea
 -inserting cuisine sandwiches
inserting restaurant Max Soha
 -inserting cuisine italian
inserting restaurant Strokos Pizza
 -inserting cuisine deli
 -inserting cuisine pizza
 -inserting cuisine sandwiches
 -inserting cuisine chicken
inserting restaurant Camille's
 -inserting cuisine american
inserting restaurant Amsterdam Restaurant
 -inserting cuisine american-new
 -inserting cuisine tapas
inserting restaurant Nussbaum & Wu
 -inserting cuisine deli
 -inserting cuisine sandwiches
 -inserting cuisine bagels
 -inserting cuisine salads
inserting restaurant Amir's Grill
 -inserting cuisine middle-eastern
inserting restaurant M2M - Morning To Midnight
 -inserting cuisine japanese
 -inserting cuisine sushi
 -inserting cuisine deli
 -inserting cuisine sandwiches
inserting restaurant The Mill
 -inserting cuisine korean
inserting restaurant Le Monde
 -inserting cuisine french
 -inserting cuisine bistro
inserting restaurant Melba's
 -inserting cuisine american-new
 -inserting cuisine soul-food
inserting restaurant Chuck E Cheese's
 -inserting cuisine pizza
 -inserting cuisine american
inserting restaurant Haagen-Dazs
 -inserting cuisine desserts
inserting restaurant Oren's
 -inserting cuisine coffee-tea
 -inserting cuisine sandwiches
inserting restaurant Dinosaur Bar-B-Que
 -inserting cuisine barbecue
 -inserting cuisine soul-food
inserting restaurant Symposium Greek Restaurant
 -inserting cuisine greek
 -inserting cuisine vegetarian
inserting restaurant Koronet Pizza
 -inserting cuisine pizza
inserting restaurant The Heights Bar & Grill
 -inserting cuisine southwestern
 -inserting cuisine tapas
 -inserting cuisine bar-food
inserting restaurant Cafe Nana
 -inserting cuisine middle-eastern
 -inserting cuisine kosher
inserting restaurant Hamilton Deli
 -inserting cuisine deli
 -inserting cuisine sandwiches
inserting restaurant Community Food & Juice
 -inserting cuisine american-new
 -inserting cuisine smoothies-juices
 -inserting cuisine vegetarian
 -inserting cuisine healthy
 -inserting cuisine localorganic
inserting restaurant Haakon's Hall
 -inserting cuisine american
 -inserting cuisine scandinavian
inserting restaurant El Porton
 -inserting cuisine mexican
inserting restaurant Brad's
 -inserting cuisine coffee-tea
inserting restaurant Mel's Burger Bar
 -inserting cuisine burgers
 -inserting cuisine bar-food
inserting restaurant Bettolona
 -inserting cuisine italian
 -inserting cuisine pizza
 -inserting cuisine localorganic
inserting restaurant Five Guys
 -inserting cuisine burgers
 -inserting cuisine hot-dogs
inserting restaurant Nikko
 -inserting cuisine chinese
 -inserting cuisine japanese
 -inserting cuisine sushi
 -inserting cuisine teahouses
inserting restaurant Falafel on Broadway
 -inserting cuisine middle-eastern
inserting restaurant Sushi Sushi
 -inserting cuisine japanese
 -inserting cuisine sushi
inserting restaurant Insomnia Cookies
 -inserting cuisine bakery-pastries
 -inserting cuisine desserts
inserting restaurant Cafe Amrita
 -inserting cuisine coffee-tea
 -inserting cuisine sandwiches
inserting restaurant Pinkberry
 -inserting cuisine desserts
inserting restaurant Artopolis Espresso Cafe
 -inserting cuisine coffee-tea
 -inserting cuisine sandwiches
 -inserting cuisine crepes
inserting restaurant Max Caffe
 -inserting cuisine coffee-tea
 -inserting cuisine sandwiches
inserting restaurant Chipotle
 -inserting cuisine mexican
inserting restaurant Chokolat Patisserie
 -inserting cuisine bakery-pastries
 -inserting cuisine desserts
inserting restaurant Chokolat Patisserie
 -inserting cuisine bakery-pastries
 -inserting cuisine coffee-tea
 -inserting cuisine desserts
inserting restaurant Joe's G-H Deli
 -inserting cuisine deli
 -inserting cuisine sandwiches
inserting restaurant Joe the Art of Coffee
 -inserting cuisine coffee-tea
inserting restaurant Levain Bakery
 -inserting cuisine bakery-pastries
 -inserting cuisine desserts
inserting restaurant Silvana
 -inserting cuisine middle-eastern
 -inserting cuisine vegetarian
inserting restaurant Bier International
 -inserting cuisine german
 -inserting cuisine eclectic
 -inserting cuisine bar-food
inserting restaurant Vegenation
 -inserting cuisine indian
 -inserting cuisine vegetarian
inserting restaurant Flat Top
 -inserting cuisine american-new
 -inserting cuisine bistro
inserting restaurant Kuro Kuma
 -inserting cuisine coffee-tea
inserting restaurant Vinateria
 -inserting cuisine american-new
inserting restaurant Henan Cart
 -inserting cuisine chinese
inserting restaurant Vine
 -inserting cuisine japanese
 -inserting cuisine sushi
 -inserting cuisine thai
inserting restaurant El Paso Truck
 -inserting cuisine mexican
inserting restaurant Mama's Fried Chicken and Pizza
 -inserting cuisine pizza
 -inserting cuisine chicken
inserting restaurant Chapati House
 -inserting cuisine indian
inserting restaurant Dig Inn Seasonal Market
 -inserting cuisine american
 -inserting cuisine localorganic
inserting restaurant Uncle Luoyang
 -inserting cuisine chinese
inserting restaurant Pita Grill
 -inserting cuisine middle-eastern
inserting restaurant Kissaten Jin
 -inserting cuisine japanese
inserting restaurant Serengeti Teas & Spices
 -inserting cuisine coffee-tea
 -inserting cuisine teahouses
inserting restaurant Lighthouse Fish Market & Restaurant
 -inserting cuisine seafood
 -inserting cuisine wings
 -inserting cuisine fish--chips
inserting restaurant Lolo's Seafood Shack
 -inserting cuisine seafood
inserting restaurant Harmony
 -inserting cuisine sandwiches
 -inserting cuisine salads
 -inserting cuisine teahouses
inserting restaurant Bernheim & Schwartz
 -inserting cuisine american
 -inserting cuisine gastropub
inserting restaurant BTH (By The Hudson)
 -inserting cuisine american-new

In [ ]: